1 Loading in Data sets + Library packages.

## 
## The downloaded binary packages are in
##  /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//RtmpKLSGxg/downloaded_packages
## 
## The downloaded binary packages are in
##  /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//RtmpKLSGxg/downloaded_packages
## 
## The downloaded binary packages are in
##  /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//RtmpKLSGxg/downloaded_packages
options(future.globals.maxSize = 74 * 1024^3) # 55 GB
getOption("future.globals.maxSize") #59055800320
## [1] 79456894976
load(here("jk_code", "JK_clean_MD_2.rds"))

2 Top genes

Idents(SO4) <- "supergroup"

type_markers <- FindAllMarkers(SO4, 
                          only.pos = TRUE,
                          logfc.threshold = 0.1,
                          min.pct = 0.1,
                          min.diff.pct = 0.05,
                          return.thresh = 0.05)
## Calculating cluster type_1
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more 
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster type_2
## Calculating cluster type_3
## Calculating cluster type_4
## Calculating cluster type_5
## Calculating cluster type_6
type_markers <- type_markers[
  !grepl("^(mt-|Rp|Gm)|Rik$", rownames(type_markers)),
  ,
  drop = FALSE
]
type_1_markers <- type_markers[type_markers$cluster == "type_1", ]
type_2_markers <- type_markers[type_markers$cluster == "type_2", ]
type_3_markers <- type_markers[type_markers$cluster == "type_3", ]
type_4_markers <- type_markers[type_markers$cluster == "type_4", ]
type_5_markers <- type_markers[type_markers$cluster == "type_5", ]
type_6_markers <- type_markers[type_markers$cluster == "type_6", ]

2.1 Type 1

df<- type_1_markers %>% arrange(desc(avg_log2FC))


df2 <- df %>% filter(p_val_adj < 0.05)


DEG_list <- df2

markers1 <- DEG_list %>% mutate(SYMBOL = gene)


ENTREZ_list <- bitr(
  geneID = DEG_list$gene,
  fromType = "SYMBOL",
  toType = "ENTREZID",
  OrgDb = org.Mm.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(geneID = DEG_list$gene, fromType = "SYMBOL", toType =
## "ENTREZID", : 1.03% of input gene IDs are fail to map...
markers1 <-  ENTREZ_list %>% inner_join(markers1, by = "SYMBOL")

# Removing genes that are not statistically significant. 
markers1 <-  markers1 %>% dplyr::filter(p_val_adj < 0.05)
#head(markers, n = 50)

pos.markers1 <-  markers1 %>% dplyr::filter(avg_log2FC > 0.4) %>% arrange(desc(abs(avg_log2FC))) 
#head(pos.markers, n = 50)

pos.ranks1 <- pos.markers1$ENTREZID[abs(pos.markers1$avg_log2FC) > 0]
#head(pos.ranks)

pos_go1 <- enrichGO(gene = pos.ranks1,           #a vector of entrez gene id
                   OrgDb = "org.Mm.eg.db",    
                   ont = "BP",
                   readable = TRUE)              #whether mapping gene ID to gene Name

pos_go1
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:240] "19283" "239435" "94214" "23850" "333050" "94191" "233271" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...234 enriched terms found
## 'data.frame':    234 obs. of  12 variables:
##  $ ID            : chr  "GO:0001822" "GO:0072001" "GO:0072073" "GO:0030198" ...
##  $ Description   : chr  "kidney development" "renal system development" "kidney epithelium development" "extracellular matrix organization" ...
##  $ GeneRatio     : chr  "19/230" "19/230" "11/230" "14/230" ...
##  $ BgRatio       : chr  "375/28928" "390/28928" "173/28928" "333/28928" ...
##  $ RichFactor    : num  0.0507 0.0487 0.0636 0.042 0.0419 ...
##  $ FoldEnrichment: num  6.37 6.13 8 5.29 5.27 ...
##  $ zScore        : num  9.37 9.13 8.26 7.05 7.03 ...
##  $ pvalue        : num  2.09e-10 4.04e-10 1.54e-07 5.20e-07 5.39e-07 ...
##  $ p.adjust      : num  6.59e-07 6.59e-07 1.67e-04 3.04e-04 3.04e-04 ...
##  $ qvalue        : num  5.28e-07 5.28e-07 1.34e-04 2.43e-04 2.43e-04 ...
##  $ geneID        : chr  "Gfra1/Bmper/Bmp2/Robo2/Nrp1/Cdkn1c/Cited1/Ass1/Col4a4/Col4a3/Zfp950/Enpp1/Irx1/Gdf11/Bcl2/Hoxd11/Hoxb7/Irx2/Magi2" "Gfra1/Bmper/Bmp2/Robo2/Nrp1/Cdkn1c/Cited1/Ass1/Col4a4/Col4a3/Zfp950/Enpp1/Irx1/Gdf11/Bcl2/Hoxd11/Hoxb7/Irx2/Magi2" "Bmper/Bmp2/Robo2/Cited1/Irx1/Gdf11/Bcl2/Hoxd11/Hoxb7/Irx2/Magi2" "Spock2/Ccdc80/Ramp2/Bmp2/Slc39a8/Scara3/P3h4/Mmp14/Wdr72/Col4a4/Col4a3/Gpm6b/Sema5a/Prdx4" ...
##  $ Count         : int  19 19 11 14 14 14 16 6 5 5 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
 chart1 <- dotplot(pos_go1) +
    ggtitle("type 1") +
    theme_classic() + 
    theme(
        plot.title = element_text(hjust = 0.5),
        legend.position = "left",
        axis.text.y = element_text(hjust = 0, size = 10)) +
    scale_y_discrete(position = "right", 
                     labels = function(x) str_wrap(x, width = 25))  # Wrap y-axis labels to 2 lines
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
 chart1

2.1.1 Viewing genes Type 1

VlnPlot(SO4, c("Pappa2","Ramp3","Itga4"))
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#filtered_genes <- markers[abs(markers$pct.1 - markers$pct.2) > 0.3, ]

2.2 Type 2

df<- type_2_markers %>% arrange(desc(avg_log2FC))


df2 <- df %>% filter(p_val_adj < 0.05)


DEG_list <- df2

markers2 <- DEG_list %>% mutate(SYMBOL = gene)


ENTREZ_list <- bitr(
  geneID = DEG_list$gene,
  fromType = "SYMBOL",
  toType = "ENTREZID",
  OrgDb = org.Mm.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(geneID = DEG_list$gene, fromType = "SYMBOL", toType =
## "ENTREZID", : 1.04% of input gene IDs are fail to map...
markers2 <-  ENTREZ_list %>% inner_join(markers2, by = "SYMBOL")

# Removing genes that are not statistically significant. 
markers2 <-  markers2 %>% dplyr::filter(p_val_adj < 0.05)
#head(markers, n = 50)

pos.markers2 <-  markers2 %>% dplyr::filter(avg_log2FC > 0.9) %>%  arrange(desc(abs(avg_log2FC)))
#head(pos.markers, n = 50)

pos.ranks2 <- pos.markers2$ENTREZID[abs(pos.markers2$avg_log2FC) > 0]
#head(pos.ranks)

pos_go2 <- enrichGO(gene = pos.ranks2,           #a vector of entrez gene id
                   OrgDb = "org.Mm.eg.db",    
                   ont = "BP",
                   readable = TRUE)              #whether mapping gene ID to gene Name

pos_go2
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:61] "14282" "13653" "12702" "11910" "14281" "16598" "16477" "22695" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...457 enriched terms found
## 'data.frame':    457 obs. of  12 variables:
##  $ ID            : chr  "GO:0010563" "GO:0045936" "GO:0001933" "GO:0031400" ...
##  $ Description   : chr  "negative regulation of phosphorus metabolic process" "negative regulation of phosphate metabolic process" "negative regulation of protein phosphorylation" "negative regulation of protein modification process" ...
##  $ GeneRatio     : chr  "12/57" "12/57" "11/57" "12/57" ...
##  $ BgRatio       : chr  "388/28928" "388/28928" "316/28928" "420/28928" ...
##  $ RichFactor    : num  0.0309 0.0309 0.0348 0.0286 0.0326 ...
##  $ FoldEnrichment: num  15.7 15.7 17.7 14.5 16.6 ...
##  $ zScore        : num  12.9 12.9 13.2 12.4 12.8 ...
##  $ pvalue        : num  1.18e-11 1.18e-11 2.62e-11 2.94e-11 5.22e-11 ...
##  $ p.adjust      : num  1.07e-08 1.07e-08 1.33e-08 1.33e-08 1.89e-08 ...
##  $ qvalue        : num  5.84e-09 5.84e-09 7.31e-09 7.31e-09 1.04e-08 ...
##  $ geneID        : chr  "Socs3/Dusp1/Jun/Gadd45b/Ppp1r15a/Irf1/Hspb1/Gadd45g/Errfi1/Trib1/Ddit4/Gadd45a" "Socs3/Dusp1/Jun/Gadd45b/Ppp1r15a/Irf1/Hspb1/Gadd45g/Errfi1/Trib1/Ddit4/Gadd45a" "Socs3/Dusp1/Jun/Gadd45b/Ppp1r15a/Irf1/Hspb1/Gadd45g/Errfi1/Trib1/Gadd45a" "Socs3/Dusp1/Jun/Gadd45b/Hspa1b/Ppp1r15a/Irf1/Hspb1/Gadd45g/Errfi1/Trib1/Gadd45a" ...
##  $ Count         : int  12 12 11 12 11 12 8 7 7 5 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
chart2 <- dotplot(pos_go2) +
    ggtitle("type 2") +
    theme_classic() + 
    theme(
        plot.title = element_text(hjust = 0.5),
        legend.position = "left",
        axis.text.y = element_text(hjust = 0, size = 10)) +
    scale_y_discrete(position = "right", 
                     labels = function(x) str_wrap(x, width = 25))  # Wrap y-axis labels to 2 li
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
chart2

2.2.1 Type 2 Gene View

VlnPlot(SO4, features = c("Egf","Umod","Cldn19","Foxq1","Cxcl12"))

2.3 Type 3

df<- type_3_markers %>% arrange(desc(avg_log2FC))


df2 <- df %>% filter(p_val_adj < 0.05)


DEG_list <- df2

markers3 <- DEG_list %>% mutate(SYMBOL = gene)


ENTREZ_list <- bitr(
  geneID = DEG_list$gene,
  fromType = "SYMBOL",
  toType = "ENTREZID",
  OrgDb = org.Mm.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(geneID = DEG_list$gene, fromType = "SYMBOL", toType =
## "ENTREZID", : 1.73% of input gene IDs are fail to map...
markers3 <-  ENTREZ_list %>% inner_join(markers3, by = "SYMBOL")

# Removing genes that are not statistically significant. 
markers3 <-  markers3 %>% dplyr::filter(p_val_adj < 0.05)
#head(markers, n = 50)

pos.markers3 <-  markers3 %>% dplyr::filter(avg_log2FC > 0.8) %>%  arrange(desc(abs(avg_log2FC)))
#head(pos.markers, n = 50)

pos.ranks3 <- pos.markers3$ENTREZID[abs(pos.markers3$avg_log2FC) > 0]
#head(pos.ranks)

pos_go3 <- enrichGO(gene = pos.ranks3,           #a vector of entrez gene id
                   OrgDb = "org.Mm.eg.db",    
                   ont = "BP",
                   readable = TRUE)              #whether mapping gene ID to gene Name

pos_go3
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:217] "16513" "330428" "268379" "53315" "15220" "69824" "13645" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...345 enriched terms found
## 'data.frame':    345 obs. of  12 variables:
##  $ ID            : chr  "GO:0031589" "GO:0010810" "GO:0043603" "GO:0015980" ...
##  $ Description   : chr  "cell-substrate adhesion" "regulation of cell-substrate adhesion" "amide metabolic process" "energy derivation by oxidation of organic compounds" ...
##  $ GeneRatio     : chr  "18/209" "13/209" "16/209" "14/209" ...
##  $ BgRatio       : chr  "377/28928" "235/28928" "466/28928" "380/28928" ...
##  $ RichFactor    : num  0.0477 0.0553 0.0343 0.0368 0.039 ...
##  $ FoldEnrichment: num  6.61 7.66 4.75 5.1 5.4 ...
##  $ zScore        : num  9.35 8.74 6.97 6.86 6.89 ...
##  $ pvalue        : num  3.48e-10 1.88e-08 3.26e-07 7.93e-07 1.04e-06 ...
##  $ p.adjust      : num  1.08e-06 2.91e-05 3.37e-04 6.13e-04 6.45e-04 ...
##  $ qvalue        : num  8.18e-07 2.21e-05 2.56e-04 4.66e-04 4.90e-04 ...
##  $ geneID        : chr  "Ntn4/Tacstd2/Jag1/Fermt1/Ccn1/Thbs1/Kank1/Rhod/Plau/Fzd4/Dbn1/Epb41l5/Itgb6/Lamb1/L1cam/Fam107a/Itgb8/Tnfrsf12a" "Tacstd2/Jag1/Fermt1/Ccn1/Thbs1/Kank1/Rhod/Plau/Fzd4/Dbn1/Epb41l5/L1cam/Fam107a" "B4galnt1/Ggt1/Ccn1/B4galt6/Acot9/Gal3st1/Pipox/Nr1h4/Acat1/Acsl1/Slc1a1/Pdk4/Itgb8/Mme/Nudt19/Cs" "Mrap2/Gabarapl1/Ppargc1a/Cycs/Idh2/Got1/Idh3a/Eva1a/Aco2/Cs/Ndufs2/Pfkm/Slc25a12/Trap1" ...
##  $ Count         : int  18 13 16 14 13 15 7 8 9 13 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
 chart3 <- dotplot(pos_go3) +
    ggtitle("type 3") +
    theme_classic() + 
    theme(
        plot.title = element_text(hjust = 0.5),
        legend.position = "left",
        axis.text.y = element_text(hjust = 0, size = 10)) +
    scale_y_discrete(position = "right", 
                     labels = function(x) str_wrap(x, width = 25))  # Wrap y-axis labels to 2 li
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
 chart3

2.3.1 Viewing Type 3 Genes

VlnPlot(SO4, features = c("Jun","Fos","Socs3","Hspb1","Dusp1","Trib1"))

2.4 Type 4

df<- type_4_markers %>% arrange(desc(avg_log2FC))


df2 <- df %>% filter(p_val_adj < 0.05)


DEG_list <- df2

markers4 <- DEG_list %>% mutate(SYMBOL = gene)


ENTREZ_list <- bitr(
  geneID = DEG_list$gene,
  fromType = "SYMBOL",
  toType = "ENTREZID",
  OrgDb = org.Mm.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(geneID = DEG_list$gene, fromType = "SYMBOL", toType =
## "ENTREZID", : 40% of input gene IDs are fail to map...
markers4 <-  ENTREZ_list %>% inner_join(markers4, by = "SYMBOL")

# Removing genes that are not statistically significant. 
markers4 <-  markers4 %>% dplyr::filter(p_val_adj < 0.05)
#head(markers, n = 50)

pos.markers4 <-  markers4 %>% dplyr::filter(avg_log2FC > 0) %>%  arrange(desc(abs(avg_log2FC)))
#head(pos.markers, n = 50)

pos.ranks4 <- pos.markers4$ENTREZID[abs(pos.markers4$avg_log2FC) > 0]
#head(pos.ranks)

pos_go4 <- enrichGO(gene = pos.ranks4,           #a vector of entrez gene id
                   OrgDb = "org.Mm.eg.db",    
                   ont = "BP",
                   readable = TRUE)              #whether mapping gene ID to gene Name

pos_go4
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:3] "15945" "15957" "100038882"
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...167 enriched terms found
## 'data.frame':    167 obs. of  12 variables:
##  $ ID            : chr  "GO:0051607" "GO:0009615" "GO:0042118" "GO:0010819" ...
##  $ Description   : chr  "defense response to virus" "response to virus" "endothelial cell activation" "regulation of T cell chemotaxis" ...
##  $ GeneRatio     : chr  "3/3" "3/3" "1/3" "1/3" ...
##  $ BgRatio       : chr  "329/28928" "412/28928" "12/28928" "15/28928" ...
##  $ RichFactor    : num  0.00912 0.00728 0.08333 0.06667 0.05263 ...
##  $ FoldEnrichment: num  87.9 70.2 803.6 642.8 507.5 ...
##  $ zScore        : num  16.1 14.4 28.3 25.3 22.5 ...
##  $ pvalue        : num  1.46e-06 2.87e-06 1.24e-03 1.55e-03 1.97e-03 ...
##  $ p.adjust      : num  0.000248 0.000248 0.026047 0.026047 0.026047 ...
##  $ qvalue        : num  4.53e-06 4.53e-06 4.75e-04 4.75e-04 4.75e-04 ...
##  $ geneID        : chr  "Cxcl10/Ifit1/Isg15" "Cxcl10/Ifit1/Isg15" "Cxcl10" "Cxcl10" ...
##  $ Count         : int  3 3 1 1 1 1 1 1 1 1 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
 chart4 <- dotplot(pos_go4) +
    ggtitle("type 4") +
    theme_classic() + 
    theme(
        plot.title = element_text(hjust = 0.5),
        legend.position = "left",
        axis.text.y = element_text(hjust = 0, size = 10)) +
    scale_y_discrete(position = "right", 
                     labels = function(x) str_wrap(x, width = 25))  # Wrap y-axis labels to 2 li
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
 chart4

2.4.1 View Type 4 Genes

VlnPlot(SO4, features = c("Cxcl10","Isg15","Ifit1"))

2.5 Type 5

df<- type_5_markers %>% arrange(desc(avg_log2FC))


df2 <- df %>% filter(p_val_adj < 0.05)


DEG_list <- df2

DEG_list$gene[DEG_list$gene == "Il1f6"] <- "Il36a"

markers5 <- DEG_list %>% mutate(SYMBOL = gene)


ENTREZ_list <- bitr(
  geneID = DEG_list$gene,
  fromType = "SYMBOL",
  toType = "ENTREZID",
  OrgDb = org.Mm.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
markers5 <-  ENTREZ_list %>% inner_join(markers5, by = "SYMBOL")

# Removing genes that are not statistically significant. 
markers5 <-  markers5 %>% dplyr::filter(p_val_adj < 0.05)
#head(markers, n = 50)

pos.markers5 <-  markers5 %>% dplyr::filter(avg_log2FC > 0) %>%  arrange(desc(abs(avg_log2FC)))
#head(pos.markers, n = 50)

pos.ranks5 <- pos.markers5$ENTREZID[abs(pos.markers5$avg_log2FC) > 0]
#head(pos.ranks)

pos_go5 <- enrichGO(gene = pos.ranks5,           #a vector of entrez gene id
                   OrgDb = "org.Mm.eg.db",    
                   ont = "BP",
                   readable = TRUE)              #whether mapping gene ID to gene Name

pos_go5
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:18] "54448" "269209" "105180375" "70337" "12156" "53420" "72309" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...28 enriched terms found
## 'data.frame':    28 obs. of  12 variables:
##  $ ID            : chr  "GO:0014888" "GO:0003300" "GO:0014897" "GO:0014896" ...
##  $ Description   : chr  "striated muscle adaptation" "cardiac muscle hypertrophy" "striated muscle hypertrophy" "muscle hypertrophy" ...
##  $ GeneRatio     : chr  "3/17" "3/17" "3/17" "3/17" ...
##  $ BgRatio       : chr  "59/28928" "122/28928" "126/28928" "128/28928" ...
##  $ RichFactor    : num  0.0508 0.0246 0.0238 0.0234 0.0207 ...
##  $ FoldEnrichment: num  86.5 41.8 40.5 39.9 35.2 ...
##  $ zScore        : num  15.9 11 10.8 10.7 10 ...
##  $ pvalue        : num  5.37e-06 4.77e-05 5.25e-05 5.50e-05 7.97e-05 ...
##  $ p.adjust      : num  0.00452 0.01157 0.01157 0.01157 0.01342 ...
##  $ qvalue        : num  0.00221 0.00566 0.00566 0.00566 0.00656 ...
##  $ geneID        : chr  "Bmp2/Gsn/Lmna" "Bmp2/Gsn/Lmna" "Bmp2/Gsn/Lmna" "Bmp2/Gsn/Lmna" ...
##  $ Count         : int  3 3 3 3 3 4 2 2 2 2 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
 chart5 <- dotplot(pos_go5) +
    ggtitle("type 5") +
    theme_classic() + 
    theme(
        plot.title = element_text(hjust = 0.5),
        legend.position = "left",
        axis.text.y = element_text(hjust = 0, size = 10)) +
    scale_y_discrete(position = "right", 
                     labels = function(x) str_wrap(x, width = 25))  # Wrap y-axis labels to 2 li
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
 chart5

## Type 6

df<- type_6_markers %>% arrange(desc(avg_log2FC))


df2 <- df %>% filter(p_val_adj < 0.05)


DEG_list <- df2

DEG_list$gene[DEG_list$gene == "Il1f6"] <- "Il36a"

markers6 <- DEG_list %>% mutate(SYMBOL = gene)


ENTREZ_list <- bitr(
  geneID = DEG_list$gene,
  fromType = "SYMBOL",
  toType = "ENTREZID",
  OrgDb = org.Mm.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
markers6 <-  ENTREZ_list %>% inner_join(markers6, by = "SYMBOL")

# Removing genes that are not statistically significant. 
markers6 <-  markers6 %>% dplyr::filter(p_val_adj < 0.05)
#head(markers, n = 50)

pos.markers6 <-  markers6 %>% dplyr::filter(avg_log2FC > 0) %>%  arrange(desc(abs(avg_log2FC)))
#head(pos.markers, n = 50)

pos.ranks6 <- pos.markers6$ENTREZID[abs(pos.markers6$avg_log2FC) > 0]
#head(pos.ranks)

pos_go6 <- enrichGO(gene = pos.ranks6,           #a vector of entrez gene id
                   OrgDb = "org.Mm.eg.db",    
                   ont = "BP",
                   readable = TRUE)              #whether mapping gene ID to gene Name

pos_go6
## #
## # over-representation test
## #
## #...@organism     Mus musculus 
## #...@ontology     BP 
## #...@keytype      ENTREZID 
## #...@gene     chr [1:48] "626578" "100039796" "60440" "14469" "667597" "15953" "55932" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05 
## #...257 enriched terms found
## 'data.frame':    257 obs. of  12 variables:
##  $ ID            : chr  "GO:0035456" "GO:0035458" "GO:0051607" "GO:0009615" ...
##  $ Description   : chr  "response to interferon-beta" "cellular response to interferon-beta" "defense response to virus" "response to virus" ...
##  $ GeneRatio     : chr  "15/47" "12/47" "16/47" "16/47" ...
##  $ BgRatio       : chr  "77/28928" "68/28928" "329/28928" "412/28928" ...
##  $ RichFactor    : num  0.1948 0.1765 0.0486 0.0388 0.0901 ...
##  $ FoldEnrichment: num  119.9 108.6 29.9 23.9 55.4 ...
##  $ zScore        : num  42.1 35.8 21.3 18.9 23.2 ...
##  $ pvalue        : num  3.92e-28 5.00e-22 5.94e-20 2.15e-18 2.10e-15 ...
##  $ p.adjust      : num  3.01e-25 1.92e-19 1.52e-17 4.14e-16 3.23e-13 ...
##  $ qvalue        : num  1.53e-25 9.73e-20 7.72e-18 2.10e-16 1.64e-13 ...
##  $ geneID        : chr  "Tgtp2/Iigp1/Gbp2/Ifi47/Gbp3/Ifitm3/Ifit3/Stat1/Igtp/Irgm2/Oas1g/Ifit1/Irgm1/Xaf1/Bst2" "Tgtp2/Iigp1/Gbp2/Ifi47/Gbp3/Ifit3/Stat1/Igtp/Irgm2/Oas1g/Ifit1/Irgm1" "Gbp2/Ifitm3/Irf7/Oasl2/Ifit3/Stat1/Igtp/Irgm2/Rtp4/Oas1g/Isg15/Ifit3b/Usp18/Ifit1/Irgm1/Bst2" "Gbp2/Ifitm3/Irf7/Oasl2/Ifit3/Stat1/Igtp/Irgm2/Rtp4/Oas1g/Isg15/Ifit3b/Usp18/Ifit1/Irgm1/Bst2" ...
##  $ Count         : int  15 12 16 16 10 8 8 8 10 10 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
 chart6 <- dotplot(pos_go6) +
    ggtitle("type 6") +
    theme_classic() + 
    theme(
        plot.title = element_text(hjust = 0.5),
        legend.position = "left",
        axis.text.y = element_text(hjust = 0, size = 10)) +
    scale_y_discrete(position = "right", 
                     labels = function(x) str_wrap(x, width = 25))  # Wrap y-axis labels to 2 li
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
 chart6

3 All 4 pathways next to eachother

chart1

chart2

chart3

chart4

chart5

chart6

4 Saving Excel File

pathway_list <- list(
  "type 1" = pos_go1@result,
  "type 2" = pos_go2@result,
  "type 3" = pos_go3@result,
  "type 4" = pos_go4@result,
  "type 5" = pos_go4@result,
  "type 6" = pos_go4@result
)


wb <- createWorkbook()


for (sheet_name in names(pathway_list)) {
  addWorksheet(wb, sheet_name)
  writeData(wb, sheet_name, pathway_list[[sheet_name]])
}

# Save the workbook
saveWorkbook(wb, here("jk_code", "supergroup_MD_DEGs.xlsx"), overwrite = TRUE)